# libraries
library(tidyverse)
library(here)
library(cregg)
library(broom)
library(stargazer)
library(ggrepel)

# rep file path
rep_path = "r/replication"

# load data
df = read_rds(here(rep_path, "data", "tidy-cjt.rds"))

# theme
my_theme = function(base_size = 12, base_family = "IBM Plex Sans Condensed") {
  ret <- theme_bw(base_size, base_family) +
    theme(plot.title = element_text(size = rel(1.4), face = "bold",
                                    family = "IBM Plex Sans Condensed"),
          plot.subtitle = element_text(size = rel(1), face = "plain",
                                       family = "IBM Plex Sans Condensed Light"),
          plot.caption = element_text(size = rel(0.8), color = "grey50", face = "plain",
                                      family = "IBM Plex Sans Condensed Light",
                                      margin = margin(t = 10)),
          panel.border = element_rect(color = "grey50", fill = NA, size = 0.15),
          panel.spacing = unit(1, "lines"),
          panel.grid.minor = element_blank(),
          strip.text = element_text(size = rel(0.9), hjust = 0,
                                    family = "IBM Plex Sans Condensed", face = "bold"),
          strip.background = element_rect(fill = "#ffffff", colour = NA),
          axis.ticks = element_blank(),
          axis.title = element_text(family = "IBM Plex Sans Condensed SemiBold", face = "plain"),
          axis.title.x = element_text(margin = margin(t = 5)),
          axis.text = element_text(family = "IBM Plex Sans Condensed Light", face = "plain"),
          legend.key = element_blank(),
          legend.text = element_text(size = rel(1), family = "IBM Plex Sans Condensed Light", face = "plain"),
          legend.spacing = unit(0.1, "lines"),
          legend.box.margin = margin(t = -0.5, unit = "lines"),
          legend.margin = margin(t = 0),
          legend.position = "bottom")
  
  ret
}




# Views on gender, sexual violence:Table A.8 --------------------------------------------------

# define general formula
form = chosen_dummy ~ victim_gender + crime + perp + social_distance + perp_gender

# attribute dictionary to clean up names
cleanup = tribble(~feature, ~label, 
                  "victim_gender", "Gender of victim", 
                  "crime", "Type of crime", 
                  "perp", "Type of perpetrator", 
                  "social_distance", "Closeness to perpetrator", 
                  "perp_gender", "Gender of perpetrator")


# tests
leaders = df %>% 
  select(chosen_dummy, victim_gender, crime, perp, social_distance, 
         perp_gender, r_id, women_leaders) %>% 
  drop_na() %>% 
  cj_anova(data = ., form, id = ~r_id, by = ~women_leaders) %>% 
  as_tibble() %>% 
  mutate(outcome = "Acceptance of women leaders in community")

changes = df %>% 
  select(chosen_dummy, victim_gender, crime, perp, social_distance, 
         perp_gender, r_id, rape_change) %>% 
  drop_na() %>% 
  cj_anova(data = ., form, id = ~r_id, by = ~rape_change) %>% 
  as_tibble()  %>% 
  mutate(outcome = "Changes in sexual assault rates")

assault = df %>% 
  select(chosen_dummy, victim_gender, crime, perp, social_distance, 
         perp_gender, r_id, sexual_assault) %>% 
  drop_na() %>% 
  cj_anova(data = ., form, id = ~r_id, by = ~sexual_assault) %>% 
  as_tibble()  %>% 
  mutate(outcome = "Likelihood of sexual assault")

woman_resp = df %>% 
  select(chosen_dummy, victim_gender, crime, perp, social_distance, 
         perp_gender, r_id, woman) %>% 
  mutate(woman = ifelse(woman == 1, "woman", "man")) %>% 
  drop_na() %>% 
  cj_anova(data = ., form, id = ~r_id, by = ~woman) %>% 
  as_tibble()  %>% 
  mutate(outcome = "Respondent is a woman")

# output
f_tables = bind_rows(leaders, 
                     changes, 
                     assault,
                     woman_resp) %>% 
  select(`Model comparison` = outcome, 
         `F statistic` = `F`, 
         `P-value` = `Pr(>F)`) %>% 
  drop_na() %>% 
  mutate_if(is.double, ~round(., 2))

stargazer(f_tables, title = "Nested model comparison test of preference heterogeneity, comparing across respondent views on gender and sexual assault.", 
          label = "ftest", summary = F, rownames = F, 
          out = here(rep_path, "figures", "gender-violence.tex"))


# Enumerator gender Table A.1, Table A.2, Figure A.1 -----------------------------------------------------------------

# define general formula
form = chosen_dummy ~ victim_gender + crime + perp + social_distance + perp_gender

# attribute dictionary to clean up names
cleanup = tribble(~feature, ~label, 
                  "victim_gender", "Gender of victim", 
                  "crime", "Type of crime", 
                  "perp", "Type of perpetrator", 
                  "social_distance", "Closeness to perpetrator", 
                  "perp_gender", "Gender of perpetrator")


# test
enum_gender = cj_anova(df, form, id = ~r_id, by = ~enum_gender)

# output
stargazer(enum_gender, summary = F,
          title = "Nested model comparison test of whether estimates vary according to enumerator gender.", label = "tab:enum-gender", 
          rownames = F, 
          out = here(rep_path, "figures", "enum-gender-test.tex"))

# mfx plot
enum_fx =
  df %>%
  mutate(enum_gender = ifelse(enum_gender == "f", "Female", "Male"),
         enum_gender = as.factor(enum_gender)) %>%
  cj(form, id = ~r_id, estimate = "mm", by = ~enum_gender)

enum_fx %>% 
  left_join(cleanup) %>% 
  # get rid of perp_
  mutate(level = str_replace(level, "perp_", "")) %>% 
  ggplot(aes(x = level, y = estimate, ymin = lower, 
             ymax = upper, label = level, 
             shape = BY, color = BY)) + 
  geom_pointrange(position = position_dodge(width = .5), alpha = .8) + 
  coord_flip() +
  theme_light(base_family = "IBM Plex Sans Condensed") + 
  theme(panel.grid.major = element_blank(), legend.position = "top",
        strip.background =element_rect(fill = "black"),
        strip.text =element_text(color = "white", face = "bold")) + 
  labs(y = "Estimate", 
       x = NULL, 
       color = "Enumerator sex:", 
       shape = "Enumerator sex:") + 
  scale_y_continuous(labels = scales::percent) + 
  scale_color_brewer(palette = "Dark2") +
  facet_grid(rows = vars(label), scales = "free", switch = "x")

ggsave(here(rep_path, "figures", "enumerator-gender.pdf"), 
       device = cairo_pdf,  width = 6, height = 10)


# what about female respondent with male enumerator vs. 
# male respondent with male enumerator
cross_g = 
  df %>% 
  # only look at surveys conducted by men
  filter(enum_gender == "m")

# test
enum_gender = cj_anova(cross_g, form, id = ~r_id, by = ~woman)


# output
stargazer(enum_gender, summary = F,
          title = "Nested model comparison test of whether estimates vary depending on respondent gender when enumerator is a man.", label = "tab:enum-gender2", 
          rownames = F, 
          out = here(rep_path, "figures", "enum-gender-test2.tex"))




# Covariate X Attribute: Table A.6, Figure A.5 ---------------------------------------------------



# define general formula
form = chosen_dummy ~ victim_gender + crime + perp + social_distance + perp_gender

# attribute dictionary to clean up names
cleanup = tribble(~feature, ~label, 
                  "victim_gender", "Gender of victim", 
                  "crime", "Type of crime", 
                  "perp", "Type of perpetrator", 
                  "social_distance", "Closeness to perpetrator", 
                  "perp_gender", "Gender of perpetrator")


# get formal F-tests of difference
fstat_pol = df %>% 
  select(chosen_dummy, victim_gender, crime, perp, social_distance, 
         perp_gender, r_id, police_fct) %>% 
  drop_na() %>% 
  cj_anova(., form, id = ~r_id, by = ~police_fct) %>% 
  as_tibble() %>% 
  mutate(outcome = "Police capacity index (high vs. low)")


fstat_pac = df %>% 
  select(chosen_dummy, victim_gender, crime, perp, social_distance, 
         perp_gender, r_id, war_pac) %>% 
  drop_na() %>% 
  cj_anova(., form, id = ~r_id, by = ~war_pac) %>% 
  as_tibble() %>% 
  mutate(outcome = "PAC membership (yes vs. no)")

fstat_victim = df %>% 
  select(chosen_dummy, victim_gender, crime, perp, social_distance, 
         perp_gender, r_id, severe_dum) %>% 
  drop_na() %>% 
  cj_anova(., form, id = ~r_id, by = ~severe_dum) %>% 
  as_tibble() %>% 
  mutate(outcome = "Crime victim (yes vs. no)")


fstat_indig = df %>% 
  select(chosen_dummy, victim_gender, crime, perp, social_distance, 
         perp_gender, r_id, indigenous_lang) %>% 
  drop_na() %>% 
  cj_anova(., form, id = ~r_id, by = ~indigenous_lang) %>% 
  as_tibble() %>% 
  mutate(outcome = "Indigenous (high vs. low)")

fstat_war = df %>% 
  select(chosen_dummy, victim_gender, crime, perp, social_distance, 
         perp_gender, r_id, war_victim) %>% 
  drop_na() %>% 
  cj_anova(data = ., form, id = ~ r_id, by = ~ war_victim) %>% 
  as_tibble()%>% 
  mutate(outcome = "Civil war victim (yes vs. no)")

f_tables = bind_rows(fstat_pol, 
          fstat_pac, 
          fstat_victim, 
          fstat_indig, 
          fstat_war) %>% 
  select(`Model comparison` = outcome, 
         `F statistic` = `F`, 
         `P-value` = `Pr(>F)`) %>% 
  drop_na() %>% 
  mutate_if(is.double, ~round(., 2))

stargazer(f_tables, title = "Nested model comparison test of preference heterogeneity.", 
          label = "ftest", summary = F, rownames = F, 
          out = here(rep_path, "figures", "ftest.tex"))



# make a plot for police
police = 
  cj(df, form, id = ~r_id, estimate = "mm", by = ~police_fct)

police %>% 
  left_join(cleanup) %>% 
  # get rid of perp_
  mutate(level = str_replace(level, "perp_", ""), 
         BY = paste("", BY)) %>% 
  ggplot(aes(x = level, y = estimate, ymin = lower, ymax = upper, 
             color = BY, 
             shape = BY)) + 
  geom_pointrange(position = position_dodge(width = .5)) + 
  coord_flip() + 
  theme_light(base_family = "IBM Plex Sans Condensed") + 
  theme(panel.grid.major = element_blank(), legend.position = "top",
        strip.background =element_rect(fill = "black"),
        strip.text =element_text(color = "white", face = "bold")) + 
  labs(y = "Estimate", 
       x = NULL, 
       color = "Police Capacity:", 
       shape = "Police Capacity:") + 
  scale_y_continuous(labels = scales::percent) + 
  scale_color_brewer(palette = "Dark2") +
  facet_grid(rows = vars(label), scales = "free", switch = "x")

ggsave(here(rep_path, "figures", "police-het.pdf"), device = cairo_pdf,
       width = 6, height = 10)


# Attribute X Attribute: Table A.7, Figure A.6, Figure A.7, Figure A.8, Figure A.9 ---------------------------------------------------


# f-tests
fstatPerp = cj_anova(filter(df, !is.na(perp_gender)), 
         chosen_dummy ~ victim_gender + crime + perp + social_distance, 
         id= ~ r_id, by= ~perp_gender) %>% 
  tidy() %>% 
  mutate(outcome = "Gender of perpetrator")

fstatPerp = fstatPerp[complete.cases(fstatPerp), ]
fstatPerp = fstatPerp %>%  filter(!row_number() %in% c(1))

fstatVic = cj_anova(filter(df, !is.na(victim_gender)), 
                     chosen_dummy ~ crime + perp + social_distance + perp_gender, 
                     id= ~ r_id, by= ~victim_gender) %>% 
  tidy() %>% 
  mutate(outcome = "Gender of victim")

fstatVic = fstatVic[complete.cases(fstatVic), ]
fstatVic = fstatVic %>%  filter(!row_number() %in% c(1))

fstatCrime = cj_anova(filter(df, !is.na(crime)), 
                    chosen_dummy ~ victim_gender + perp + social_distance + perp_gender, 
                    id= ~ r_id, by= ~crime) %>% 
  tidy() %>% 
  mutate(outcome = "Type of crime")

fstatCrime = fstatCrime[complete.cases(fstatCrime), ]
fstatCrime = fstatCrime %>%  filter(!row_number() %in% c(1))

fstatSocial = cj_anova(filter(df, !is.na(social_distance)), 
                      chosen_dummy ~ victim_gender + perp + crime + perp_gender, 
                      id= ~ r_id, by= ~social_distance) %>% 
  tidy() %>% 
  mutate(outcome = "Closeness to perpetrator")

fstatSocial = fstatSocial[complete.cases(fstatSocial), ]
fstatSocial = fstatSocial %>%  filter(!row_number() %in% c(1))

fstatType = cj_anova(filter(df, !is.na(perp)), 
                       chosen_dummy ~ victim_gender + social_distance + crime + perp_gender, 
                       id= ~ r_id, by= ~perp) %>% 
  tidy() %>% 
  mutate(outcome = "Type of perpetrator")

fstatType = fstatType[complete.cases(fstatType), ]
fstatType = fstatType %>%  filter(!row_number() %in% c(1))

f_tables = bind_rows(fstatPerp, 
                     fstatVic, 
                     fstatSocial, 
                     fstatType, 
                     fstatCrime) %>% 
  select(`Model comparison` = outcome, 
         `F statistic` = statistic, 
         `P-value` = p.value) %>% 
  drop_na() %>% 
  mutate_if(is.double, ~round(., 3))

stargazer(f_tables, title = "Nested model comparison test of preference heterogeneity across attributes.", 
          label = "ftest-inter", summary = F, rownames = F, 
          out = here(rep_path, "figures", "ftest-inter.tex"))




## interactive plots (marginal means)

#conjoint: gender of perpetrator
perpetratorgender = cj(df, chosen_dummy ~ victim_gender + crime + perp + 
                         social_distance,
                        id= ~ r_id, estimate="mm", by= ~ perp_gender)

perpetratorgender %>% 
  left_join(cleanup) %>% 
  # get rid of perp_
  mutate(level = str_replace(level, "perp_", ""), 
         BY = str_replace(BY, "perp_", "")) %>% 
  ggplot(aes(x = level, y = estimate, ymin = lower, ymax = upper, 
             color = BY, 
             shape = BY)) + 
  geom_pointrange(position = position_dodge(width = .5)) + 
  coord_flip() + 
  theme_light(base_family = "IBM Plex Sans Condensed") + 
  theme(panel.grid.major = element_blank(), legend.position = "top",
        strip.background =element_rect(fill = "black"),
        strip.text =element_text(color = "white", face = "bold")) + 
  labs(y = "Estimate", 
       x = NULL, 
       color = "Gender of Perpetrator:", 
       shape = "Gender of Perpetrator:") + 
  scale_y_continuous(labels = scales::percent) + 
  scale_color_brewer(palette = "Dark2") +
  facet_grid(rows = vars(label), scales = "free", switch = "x")

ggsave(here(rep_path, "figures", "perpGender-het.pdf"), device = cairo_pdf,
       width = 6, height = 10)



#conjoint: gender of victim
victimgender = cj(df, chosen_dummy ~ crime + perp + social_distance + perp_gender,
                  id= ~ r_id, estimate="mm", by= ~ victim_gender)


victimgender %>% 
  left_join(cleanup) %>% 
  # get rid of perp_
  mutate(level = str_replace(level, "perp_", "")) %>% 
  ggplot(aes(x = level, y = estimate, ymin = lower, ymax = upper, 
             color = BY, 
             shape = BY)) + 
  geom_pointrange(position = position_dodge(width = .5)) + 
  coord_flip() + 
  theme_light(base_family = "IBM Plex Sans Condensed") + 
  theme(panel.grid.major = element_blank(), legend.position = "top",
        strip.background =element_rect(fill = "black"),
        strip.text =element_text(color = "white", face = "bold")) + 
  labs(y = "Estimate", 
       x = NULL, 
       color = "Gender of Victim:", 
       shape = "Gender of Victim:") + 
  scale_y_continuous(labels = scales::percent) + 
  scale_color_brewer(palette = "Dark2") +
  facet_grid(rows = vars(label), scales = "free", switch = "x")

ggsave(here(rep_path, "figures", "victim-het.pdf"), device = cairo_pdf,
       width = 6, height = 10)


#conjoint: Crime
crime = cj(df, chosen_dummy ~ victim_gender + perp + social_distance + perp_gender, 
            id= ~ r_id, estimate="mm", by= ~ crime)

crime %>% 
  left_join(cleanup) %>% 
  # get rid of perp_
  mutate(level = str_replace(level, "perp_", "")) %>% 
  ggplot(aes(x = level, y = estimate, ymin = lower, ymax = upper, 
             color = BY)) + 
  geom_pointrange(position = position_dodge(width = .5)) + 
  coord_flip() + 
  theme_light(base_family = "IBM Plex Sans Condensed") + 
  theme(panel.grid.major = element_blank(), legend.position = "top",
        strip.background =element_rect(fill = "black"),
        strip.text =element_text(color = "white", face = "bold")) + 
  labs(y = "Estimate", 
       x = NULL, 
       color = "Crime:", 
       shape = "Crime:") + 
  scale_y_continuous(labels = scales::percent) + 
  scale_color_brewer(palette = "Dark2") +
  facet_grid(rows = vars(label), scales = "free", switch = "x")

ggsave(here(rep_path, "figures", "crime-het.pdf"), device = cairo_pdf,
       width = 6, height = 10)

#conjoint: social distance

distance = cj(df, chosen_dummy ~ victim_gender + perp + crime + perp_gender, 
           id= ~ r_id, estimate="mm", by= ~ social_distance)

distance %>% 
  left_join(cleanup) %>% 
  # get rid of perp_
  mutate(level = str_replace(level, "perp_", "")) %>% 
  ggplot(aes(x = level, y = estimate, ymin = lower, ymax = upper, 
             color = BY)) + 
  geom_pointrange(position = position_dodge(width = .5)) + 
  coord_flip() + 
  theme_light(base_family = "IBM Plex Sans Condensed") + 
  theme(panel.grid.major = element_blank(), legend.position = "top",
        strip.background =element_rect(fill = "black"),
        strip.text =element_text(color = "white", face = "bold")) + 
  labs(y = "Estimate", 
       x = NULL, 
       color = "Closeness to Perpetrator:", 
       shape = "Closeness to Perpetrator:") + 
  scale_y_continuous(labels = scales::percent) + 
  scale_color_brewer(palette = "Dark2") +
  facet_grid(rows = vars(label), scales = "free", switch = "x")

ggsave(here(rep_path, "figures", "distance-het.pdf"), device = cairo_pdf,
       width = 7, height = 10)
